import pandas as pd
import pickle as pk
from tqdm import tqdm
import json
import os.path

import sys
args = sys.argv
batch_id = int(args[1])

print('Loading files.')

batches = pd.read_pickle('../data/batches.pk')

filenames = batches[batch_id]

list_of_dataframes = []

for filename in tqdm(filenames):
    if filename == '../data/gpo_sentences/1994-02-01_528.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-01-31_507.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-02-01_549.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-01-31_430.csv':
        print('Empty file.')
    else:
        temp = pd.read_csv(filename)
        temp['doc'] = filename
        list_of_dataframes.append(temp)

split_sentences = pd.concat(list_of_dataframes, ignore_index=True)
split_sentences.to_csv('../data/gpo_split_sentences_by_batch/split_sentences_{0}.csv'.format(batch_id), index=False)

split_sentences = pd.read_csv('../data/gpo_split_sentences_by_batch/split_sentences_{0}.csv'.format(batch_id))
len_batch = len(split_sentences)

size = 10000

for i in range(0,len_batch,size):
    print(i)
    if not os.path.isfile("../data/gpo_srl_annotations/srl_res_small_{0}_{1}.json".format(batch_id, i)):
        print('Dealing with it...')
        split_sentences = pd.read_csv("../data/gpo_split_sentences_by_batch/split_sentences_{0}.csv".format(batch_id))
        if (i+size) > len_batch:
            split_sentences = split_sentences.iloc[i:len_batch]
        else:
            split_sentences = split_sentences.iloc[i:i+size]
        split_sentences = (list(split_sentences['doc']), list(split_sentences['sentence']))
        from narrativeNLP.wrappers import run_srl
        srl_res = run_srl(
            path="https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz",
            sentences=split_sentences[1],
            save_to_disk="../data/gpo_srl_annotations/srl_res_small_{0}_{1}.json".format(batch_id, i),
            batch_size=25,
            progress_bar=True,
            )
